package com.mano.demo;

import java.util.BitSet;
import java.util.Objects;

/**
 * @Author: zj
 * @Description: 布隆过滤器，误判率在万分之一一下
 * 可以建立白名单 对 不能确定的 url 来 放行
 * 为每个URL分配4个字节，常规
 * @Date: Created in 15:11 2020/9/7
 * @Modified By:
 */
public class SimpleBloomFilter {

    private static final int DEFAULT_SIZE= 2 << 24;
    private static final int[] seeds = new int[]{7,11,13,31,37,61};


    private BitSet bits = new BitSet(DEFAULT_SIZE);
    private SimpleHash[] func = new SimpleHash[seeds.length];

    public SimpleBloomFilter(){
        for (int i = 0; i < seeds.length; i++) {
            func[i] = new SimpleHash(DEFAULT_SIZE,seeds[i]);
        }
    }

    public void add(String value){
        for(SimpleHash f:func){
            bits.set(f.hash(value),true);
        }
    }

    public void add(CrawlerUrl value){
        if(null != value){
            add(value.getOriUrl());
        }
    }

    public boolean contains(String value){
        if(Objects.isNull(value)){
            return false;
        }

        boolean ret = true;
        for(SimpleHash f:func){
            ret = ret && bits.get(f.hash(value));
        }
        return ret;
    }

    public boolean contains(CrawlerUrl value){
        return contains(value.getOriUrl());
    }


    public static void main(String[] args) {
        String value = "stoene21312@yahoo.cn";
        SimpleBloomFilter bloomFilter = new SimpleBloomFilter();
        System.out.println(bloomFilter.contains(value));
        bloomFilter.add(value);
        System.out.println(bloomFilter.contains(value));

    }


    public static class SimpleHash{
        private int cap;
        private int seed;

        public SimpleHash(int cap,int seed){
            this.cap = cap;
            this.seed = seed;
        }

        public int hash(String value){
            int result = 0;
            int len = value.length();
            for (int i = 0; i < len; i++) {
                result = seed * result + value.charAt(i);
            }
            return (cap-1) & result;
        }
    }
}
